Detailed Survival analyis of the Survival lung data.

Libraries

library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('keep.trailing.zeros',TRUE)

Libraries

data(lung)
## Warning in data(lung): data set 'lung' not found
lung$inst <- NULL
lung$status <- lung$status - 1
lung <- lung[complete.cases(lung),]

pander::pander(table(lung$status))
0 1
47 121
pander::pander(summary(lung$time))
Min. 1st Qu. Median Mean 3rd Qu. Max.
5 175 268 310 416 1022

Exploring Raw Features with RRPlot

convar <- colnames(lung)[lapply(apply(lung,2,unique),length) > 10]
convar <- convar[convar != "time"]
topvar <- univariate_BinEnsemble(lung[,c("status",convar)],"status")
pander::pander(topvar)
age wt.loss
0.106 0.106
topv <- min(5,length(topvar))
topFive <- names(topvar)[1:topv]
RRanalysis <- list();
idx <- 1
for (topf in topFive)
{
  RRanalysis[[idx]] <- RRPlot(cbind(lung$status,lung[,topf]),
                              atRate=c(0.90),
                  timetoEvent=lung$time,
                  title=topf,
#                  plotRR=FALSE
                  )
  idx <- idx + 1
}

names(RRanalysis) <- topFive

Reporting the Metrics

ROCAUC <- NULL
CstatCI <- NULL
LogRangp <- NULL
Sensitivity <- NULL
Specificity <- NULL

for (topf in topFive)
{
  CstatCI <- rbind(CstatCI,RRanalysis[[topf]]$c.index$cstatCI)
  LogRangp <- rbind(LogRangp,RRanalysis[[topf]]$surdif$pvalue)
  Sensitivity <- rbind(Sensitivity,RRanalysis[[topf]]$ROCAnalysis$sensitivity)
  Specificity <- rbind(Specificity,RRanalysis[[topf]]$ROCAnalysis$specificity)
  ROCAUC <- rbind(ROCAUC,RRanalysis[[topf]]$ROCAnalysis$aucs)
}
rownames(CstatCI) <- topFive
rownames(LogRangp) <- topFive
rownames(Sensitivity) <- topFive
rownames(Specificity) <- topFive
rownames(ROCAUC) <- topFive

pander::pander(ROCAUC)
  est lower upper
age 0.591 0.494 0.688
wt.loss 0.549 0.451 0.647
pander::pander(CstatCI)
  mean.C Index median lower upper
age 0.558 0.558 0.498 0.616
wt.loss 0.509 0.509 0.452 0.569
pander::pander(LogRangp)
age 0.919
wt.loss 0.358
pander::pander(Sensitivity)
  est lower upper
age 0.1157 0.0647 0.187
wt.loss 0.0496 0.0184 0.105
pander::pander(Specificity)
  est lower upper
age 0.872 0.743 0.952
wt.loss 0.894 0.769 0.965
meanMatrix <- cbind(ROCAUC[,1],CstatCI[,1],Sensitivity[,1],Specificity[,1])
colnames(meanMatrix) <- c("ROCAUC","C-Stat","Sen","Spe")
pander::pander(meanMatrix)
  ROCAUC C-Stat Sen Spe
age 0.591 0.558 0.1157 0.872
wt.loss 0.549 0.509 0.0496 0.894

Modeling

ml <- BSWiMS.model(Surv(time,status)~1,data=lung,NumberofRepeats = 10)

[+++++++++++++++++++++++++++++]..

sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower HR upper u.Accuracy r.Accuracy
ph.ecog 4.32e-01 1.194 1.541 1.988 0.679 0.649
sex -4.59e-01 0.456 0.632 0.876 0.649 0.679
pat.karno -1.77e-03 0.997 0.998 1.000 0.506 0.720
ph.karno -4.64e-07 1.000 1.000 1.000 0.577 0.720
age 4.57e-08 1.000 1.000 1.000 0.565 0.720
Table continues below
  full.Accuracy u.AUC r.AUC full.AUC IDI NRI
ph.ecog 0.601 0.601 0.620 0.600 0.0449 0.405
sex 0.601 0.620 0.601 0.600 0.0285 0.478
pat.karno 0.506 0.585 0.500 0.585 0.0292 0.342
ph.karno 0.577 0.570 0.500 0.570 0.0143 0.280
age 0.565 0.549 0.500 0.549 0.0162 0.195
  z.IDI z.NRI Delta.AUC Frequency
ph.ecog 3.33 2.48 -0.02005 1.0
sex 2.76 2.85 -0.00167 1.0
pat.karno 2.44 2.24 0.08546 1.0
ph.karno 2.22 1.64 0.06998 0.8
age 1.97 1.14 0.04871 0.1

Cox Model Performance

Here we evaluate the model using the RRPlot() function.

The evaluation of the raw Cox model with RRPlot()

Here we will use the predicted event probability assuming a baseline hazard for events withing 5 years

timeinterval <- 2*mean(subset(lung,status==1)$time)

h0 <- sum(lung$status & lung$time <= timeinterval)
h0 <- h0/sum((lung$time > timeinterval) | (lung$status==1))
pander::pander(t(c(h0=h0,timeinterval=timeinterval)),caption="Initial Parameters")
Initial Parameters
h0 timeinterval
0.85 578
index <- predict(ml,lung)

rdata <- cbind(lung$status,ppoisGzero(index,h0))

rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90),
                     timetoEvent=lung$time,
                     title="Raw Train: Lung Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

As we can see the Observed probability as well as the Time vs. Events are not calibrated.

Uncalibrated Performance Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.649 0.478 3.39e-01 3.39e-01 0.493
RR 1.214 1.742 6.85e+01 6.85e+01 1.270
RR_LCI 1.014 1.260 1.44e-01 1.44e-01 1.037
RR_UCI 1.454 2.408 3.26e+04 3.26e+04 1.555
SEN 0.314 0.826 1.00e+00 1.00e+00 0.612
SPE 0.830 0.511 1.91e-01 1.91e-01 0.596
BACC 0.572 0.669 5.96e-01 5.96e-01 0.604
NetBenefit 0.138 0.470 6.04e-01 6.04e-01 0.331
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.65 1.37 1.97 3.16e-07
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.23 1.23 1.19 1.27
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.2 1.2 1.19 1.21
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.651 0.651 0.587 0.712
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.691 0.598 0.784
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.314 0.233 0.405
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.83 0.692 0.924
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.648
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 7.945448 on 1 degrees of freedom, p = 0.004821
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 122 83 95.6 1.65 7.95
class=1 46 38 25.4 6.22 7.95

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,lung,"status","time")

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
1.29 1.52 749

The RRplot() of the calibrated model

h0 <- calprob$h0
timeinterval <- calprob$timeInterval;

rdata <- cbind(lung$status,calprob$prob)


rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90),
                     timetoEvent=lung$time,
                     title="Train: Lung",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Calibrated Train Performance

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.7963 0.628 4.67e-01 4.67e-01 0.479
RR 1.2142 1.742 6.85e+01 6.85e+01 2.784
RR_LCI 1.0143 1.260 1.44e-01 1.44e-01 1.315
RR_UCI 1.4536 2.408 3.26e+04 3.26e+04 5.893
SEN 0.3140 0.826 1.00e+00 1.00e+00 0.959
SPE 0.8298 0.511 1.91e-01 1.91e-01 0.277
BACC 0.5719 0.669 5.96e-01 5.96e-01 0.618
NetBenefit 0.0401 0.365 5.22e-01 5.22e-01 0.504
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.45 1.2 1.73 0.000124
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.06 1.05 1.02 1.1
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1 1 0.996 1.01
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.651 0.65 0.588 0.711
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.691 0.598 0.784
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.314 0.233 0.405
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.83 0.692 0.924
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.795
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 7.945448 on 1 degrees of freedom, p = 0.004821
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 122 83 95.6 1.65 7.95
class=1 46 38 25.4 6.22 7.95

Cross-Validation

rcv <- randomCV(theData=lung,
                theOutcome = Surv(time,status)~1,
                fittingFunction=BSWiMS.model, 
                trainFraction = 0.95,
                repetitions=200,
                classSamplingType = "Pro"
         )

.[+++].[+++].[++-].[+++].[+++].[++++].[+++].[+++].[++].[+++]10 Tested: 74 Avg. Selected: 3.8 Min Tests: 1 Max Tests: 4 Mean Tests: 1.351351 . MAD: 0.4953868

.[+++].[+++].[+++].[+++].[++].[+++].[+++].[+++].[++-].[++]20 Tested: 114 Avg. Selected: 3.75 Min Tests: 1 Max Tests: 4 Mean Tests: 1.754386 . MAD: 0.488174

.[+++].[+++].[+++].[+++].[+++].[++].[+++].[+++].[+++].[+]30 Tested: 138 Avg. Selected: 3.733333 Min Tests: 1 Max Tests: 6 Mean Tests: 2.173913 . MAD: 0.4841222

.[+++].[++].[+].[+].[+++].[++++].[+++].[+++].[+++].[++]40 Tested: 149 Avg. Selected: 3.675 Min Tests: 1 Max Tests: 7 Mean Tests: 2.684564 . MAD: 0.4813307

.[+++].[++].[+++].[+++].[+++].[++].[+++].[++].[++++].[+++]50 Tested: 162 Avg. Selected: 3.7 Min Tests: 1 Max Tests: 9 Mean Tests: 3.08642 . MAD: 0.4792541

.[+++].[++++].[++++].[+++].[+++].[+++].[+++].[+++].[+++].[++]60 Tested: 164 Avg. Selected: 3.75 Min Tests: 1 Max Tests: 10 Mean Tests: 3.658537 . MAD: 0.4800293

.[+++].[+++].[+++].[+++].[+++].[++].[+++].[+++].[+++].[++]70 Tested: 166 Avg. Selected: 3.757143 Min Tests: 1 Max Tests: 11 Mean Tests: 4.216867 . MAD: 0.4783108

.[++].[+++].[+++].[++].[+++].[+++].[++].[+++].[+++].[+++]80 Tested: 167 Avg. Selected: 3.75 Min Tests: 1 Max Tests: 12 Mean Tests: 4.790419 . MAD: 0.4769977

.[+].[+++].[++].[++].[+++].[+++].[+++].[++].[++].[++]90 Tested: 167 Avg. Selected: 3.7 Min Tests: 1 Max Tests: 12 Mean Tests: 5.389222 . MAD: 0.4768981

.[+++].[++].[+++].[+++].[++].[+++].[+++].[++++].[++].[+++]100 Tested: 167 Avg. Selected: 3.71 Min Tests: 1 Max Tests: 13 Mean Tests: 5.988024 . MAD: 0.476791

.[+++].[+++].[+++].[+++].[++].[+++].[+++].[+++].[+++].[++++]110 Tested: 168 Avg. Selected: 3.736364 Min Tests: 1 Max Tests: 16 Mean Tests: 6.547619 . MAD: 0.4754411

.[+++].[++-].[+++].[+++].[++].[+++].[+++].[++].[++].[+++]120 Tested: 168 Avg. Selected: 3.725 Min Tests: 2 Max Tests: 17 Mean Tests: 7.142857 . MAD: 0.4753261

.[++++].[+++].[++].[++].[++].[+++].[++++].[+++].[+++].[+++]130 Tested: 168 Avg. Selected: 3.738462 Min Tests: 2 Max Tests: 17 Mean Tests: 7.738095 . MAD: 0.4750343

.[+++].[+++].[++].[++].[+++].[+++].[+++].[++].[++-].[+++]140 Tested: 168 Avg. Selected: 3.728571 Min Tests: 2 Max Tests: 18 Mean Tests: 8.333333 . MAD: 0.4752763

.[+++].[+++].[+++].[+++].[+++].[+++].[+++].[++].[+++].[+++]150 Tested: 168 Avg. Selected: 3.74 Min Tests: 2 Max Tests: 18 Mean Tests: 8.928571 . MAD: 0.475544

.[+++].[++].[+++].[+++].[+++].[+++].[+++].[++].[+++].[+]160 Tested: 168 Avg. Selected: 3.73125 Min Tests: 2 Max Tests: 20 Mean Tests: 9.52381 . MAD: 0.475665

.[++++].[+++].[+++].[+++].[+++].[+++].[++].[+++].[+++].[+++]170 Tested: 168 Avg. Selected: 3.747059 Min Tests: 2 Max Tests: 21 Mean Tests: 10.11905 . MAD: 0.4755018

.[+++].[++].[+++].[+++].[++].[+++].[++].[+++].[+++].[+++]180 Tested: 168 Avg. Selected: 3.744444 Min Tests: 2 Max Tests: 21 Mean Tests: 10.71429 . MAD: 0.4756397

.[++].[+++].[+++].[++].[+++].[+++].[+++].[++].[+++].[+++]190 Tested: 168 Avg. Selected: 3.742105 Min Tests: 3 Max Tests: 22 Mean Tests: 11.30952 . MAD: 0.4756545

.[+++].[+++].[+++].[+++].[+++].[+++].[+++].[++].[+++].[+++]200 Tested: 168 Avg. Selected: 3.75 Min Tests: 3 Max Tests: 25 Mean Tests: 11.90476 . MAD: 0.4755465

stp <- rcv$survTestPredictions
stp <- stp[!is.na(stp[,4]),]

bbx <- boxplot(unlist(stp[,1])~rownames(stp),plot=FALSE)
times <- bbx$stats[3,]
status <- boxplot(unlist(stp[,2])~rownames(stp),plot=FALSE)$stats[3,]
prob <- ppoisGzero(boxplot(unlist(stp[,4])~rownames(stp),plot=FALSE)$stats[3,],h0)

rdatacv <- cbind(status,prob)
rownames(rdatacv) <- bbx$names
names(times) <- bbx$names

rrAnalysisTest <- RRPlot(rdatacv,atRate=c(0.90),
                     timetoEvent=times,
                     title="Test: Lung Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Cross-Validation Test Performance

pander::pander(t(rrAnalysisTest$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.8074 0.605 0.605 0.438 0.515
RR 1.1859 2.958 2.958 1.000 2.612
RR_LCI 0.9720 1.387 1.387 0.000 1.244
RR_UCI 1.4469 6.309 6.309 0.000 5.483
SEN 0.1983 0.959 0.959 1.000 0.959
SPE 0.8936 0.298 0.298 0.000 0.255
BACC 0.5460 0.628 0.628 0.500 0.607
NetBenefit 0.0181 0.390 0.390 0.502 0.469
pander::pander(t(rrAnalysisTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.45 1.2 1.73 0.000124
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.05 1.05 1.02 1.09
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.951 0.952 0.941 0.962
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.603 0.602 0.537 0.67
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.612 0.512 0.712
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.19 0.124 0.271
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.894 0.769 0.965
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.808
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
Logrank test Chisq = 2.346444 on 1 degrees of freedom, p = 0.125569
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 140 98 103.8 0.328 2.35
class=1 28 23 17.2 1.981 2.35

Calibrating the test results

rdatacv <- cbind(status,prob,times)
calprob <- CalibrationProbPoissonRisk(rdatacv)

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.85 1 753
timeinterval <- calprob$timeInterval;

rdata <- cbind(status,calprob$prob)


rrAnalysisTest <- RRPlot(rdata,atRate=c(0.90),
                     timetoEvent=times,
                     title="Calibrated Test: Lung",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Calibrated Test Performance

pander::pander(t(rrAnalysisTest$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.8074 0.605 0.605 0.438 0.515
RR 1.1859 2.958 2.958 1.000 2.612
RR_LCI 0.9720 1.387 1.387 0.000 1.244
RR_UCI 1.4469 6.309 6.309 0.000 5.483
SEN 0.1983 0.959 0.959 1.000 0.959
SPE 0.8936 0.298 0.298 0.000 0.255
BACC 0.5460 0.628 0.628 0.500 0.607
NetBenefit 0.0181 0.390 0.390 0.502 0.469
pander::pander(t(rrAnalysisTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.45 1.2 1.73 9.73e-05
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.06 1.06 1.02 1.1
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.951 0.952 0.941 0.962
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.603 0.601 0.536 0.664
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.612 0.512 0.712
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.19 0.124 0.271
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.894 0.769 0.965
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.808
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
Logrank test Chisq = 2.346444 on 1 degrees of freedom, p = 0.125569
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 140 98 103.8 0.328 2.35
class=1 28 23 17.2 1.981 2.35